import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels import PanelOLS
from linearmodels import RandomEffects
from scipy import stats
import statsmodels.formula.api as statf
import plotly.express as px
import seaborn as sns
from linearmodels.panel import compare
from tqdm.notebook import tqdm
from IPython import display as ICD
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_white, het_breuschpagan
import plotly.offline as pyo
pyo.init_notebook_mode()
Связь меджу HDI и I-desi¶
Преодобратботка и очистка данных¶
Скачиваем iso код для стран¶
countries_code = pd.read_csv("all.csv")
countries_code
| name | alpha-2 | alpha-3 | country-code | iso_3166-2 | region | sub-region | intermediate-region | region-code | sub-region-code | intermediate-region-code | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AF | AFG | 4 | ISO 3166-2:AF | Asia | Southern Asia | NaN | 142.0 | 34.0 | NaN |
| 1 | Åland Islands | AX | ALA | 248 | ISO 3166-2:AX | Europe | Northern Europe | NaN | 150.0 | 154.0 | NaN |
| 2 | Albania | AL | ALB | 8 | ISO 3166-2:AL | Europe | Southern Europe | NaN | 150.0 | 39.0 | NaN |
| 3 | Algeria | DZ | DZA | 12 | ISO 3166-2:DZ | Africa | Northern Africa | NaN | 2.0 | 15.0 | NaN |
| 4 | American Samoa | AS | ASM | 16 | ISO 3166-2:AS | Oceania | Polynesia | NaN | 9.0 | 61.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 244 | Wallis and Futuna | WF | WLF | 876 | ISO 3166-2:WF | Oceania | Polynesia | NaN | 9.0 | 61.0 | NaN |
| 245 | Western Sahara | EH | ESH | 732 | ISO 3166-2:EH | Africa | Northern Africa | NaN | 2.0 | 15.0 | NaN |
| 246 | Yemen | YE | YEM | 887 | ISO 3166-2:YE | Asia | Western Asia | NaN | 142.0 | 145.0 | NaN |
| 247 | Zambia | ZM | ZMB | 894 | ISO 3166-2:ZM | Africa | Sub-Saharan Africa | Eastern Africa | 2.0 | 202.0 | 14.0 |
| 248 | Zimbabwe | ZW | ZWE | 716 | ISO 3166-2:ZW | Africa | Sub-Saharan Africa | Eastern Africa | 2.0 | 202.0 | 14.0 |
249 rows × 11 columns
Скачиваем итоговый рейтинг за период по индексу I-desi¶
desi_total = pd.read_csv("desi_total-data (2).csv")
desi_total.rename(columns={'value': 'total_score_desi'}, inplace=True)
desi_total = desi_total[desi_total['country']!='EU']
desi_total.loc[desi_total['country'] == "EL", 'country'] = "GR"
desi_total
| period | country | indicator | breakdown | unit | total_score_desi | flags | |
|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi_total | desi_total | pc_desi | 54.675671 | NaN |
| 1 | 2022 | BE | desi_total | desi_total | pc_desi | 50.307388 | NaN |
| 2 | 2022 | BG | desi_total | desi_total | pc_desi | 37.679882 | NaN |
| 3 | 2022 | CY | desi_total | desi_total | pc_desi | 48.352205 | NaN |
| 4 | 2022 | CZ | desi_total | desi_total | pc_desi | 49.143522 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 162 | 2017 | PT | desi_total | desi_total | pc_desi | 35.478987 | NaN |
| 163 | 2017 | RO | desi_total | desi_total | pc_desi | 19.399117 | NaN |
| 164 | 2017 | SE | desi_total | desi_total | pc_desi | 45.711845 | NaN |
| 165 | 2017 | SI | desi_total | desi_total | pc_desi | 35.702736 | NaN |
| 166 | 2017 | SK | desi_total | desi_total | pc_desi | 29.783805 | NaN |
162 rows × 7 columns
Скачиваем показатели по 4 субиндексам I-desi за период¶
desi_1 = pd.read_csv("desi-data.csv", sep=',', encoding='ISO-8859-1')
desi_1.loc[desi_1['country'] == "EL", 'country'] = "GR"
desi_1
| period | country | indicator | breakdown | unit | value | flags | |
|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi | desi_hc | pc_desi | 12.738011 | NaN |
| 1 | 2022 | BE | desi | desi_hc | pc_desi | 12.172748 | NaN |
| 2 | 2022 | BG | desi | desi_hc | pc_desi | 8.147561 | NaN |
| 3 | 2022 | CY | desi | desi_hc | pc_desi | 10.441309 | NaN |
| 4 | 2022 | CZ | desi | desi_hc | pc_desi | 11.397470 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 667 | 2017 | RO | desi | desi_dps | pc_desi | 1.853090 | NaN |
| 668 | 2017 | SE | desi | desi_dps | pc_desi | 14.785592 | NaN |
| 669 | 2017 | SI | desi | desi_dps | pc_desi | 11.599579 | NaN |
| 670 | 2017 | SK | desi | desi_dps | pc_desi | 9.072821 | NaN |
| 671 | 2017 | EU | desi | desi_dps | pc_desi | 11.675391 | NaN |
672 rows × 7 columns
desi_1 = desi_1.merge(countries_code[["alpha-3", "alpha-2", 'name']], how='left', left_on='country', right_on='alpha-2')
desi_1 = desi_1.merge(desi_total[['country','period', "total_score_desi"]], how='left', on=['country', 'period'])
desi_1
| period | country | indicator | breakdown | unit | value | flags | alpha-3 | alpha-2 | name | total_score_desi | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi | desi_hc | pc_desi | 12.738011 | NaN | AUT | AT | Austria | 54.675671 |
| 1 | 2022 | BE | desi | desi_hc | pc_desi | 12.172748 | NaN | BEL | BE | Belgium | 50.307388 |
| 2 | 2022 | BG | desi | desi_hc | pc_desi | 8.147561 | NaN | BGR | BG | Bulgaria | 37.679882 |
| 3 | 2022 | CY | desi | desi_hc | pc_desi | 10.441309 | NaN | CYP | CY | Cyprus | 48.352205 |
| 4 | 2022 | CZ | desi | desi_hc | pc_desi | 11.397470 | NaN | CZE | CZ | Czechia | 49.143522 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 667 | 2017 | RO | desi | desi_dps | pc_desi | 1.853090 | NaN | ROU | RO | Romania | 19.399117 |
| 668 | 2017 | SE | desi | desi_dps | pc_desi | 14.785592 | NaN | SWE | SE | Sweden | 45.711845 |
| 669 | 2017 | SI | desi | desi_dps | pc_desi | 11.599579 | NaN | SVN | SI | Slovenia | 35.702736 |
| 670 | 2017 | SK | desi | desi_dps | pc_desi | 9.072821 | NaN | SVK | SK | Slovakia | 29.783805 |
| 671 | 2017 | EU | desi | desi_dps | pc_desi | 11.675391 | NaN | NaN | NaN | NaN | NaN |
672 rows × 11 columns
Скачиваем показатели за период по индексу HDI. Через цикл переносим года в строки, а показатели субиндекса остаюстся столбцами¶
hdi_1 = pd.read_csv("HDR23-24_Composite_indices_complete_time_series (2).csv", sep=',', encoding='ISO-8859-1')
hdi_1 = hdi_1[["iso3",'hdi_2017', 'hdi_2018', 'hdi_2019', 'hdi_2020', 'hdi_2021', 'hdi_2022',
'le_2017', 'le_2018', 'le_2019', 'le_2020', 'le_2021', 'le_2022',
'eys_2017', 'eys_2018', 'eys_2019', 'eys_2020', 'eys_2021', 'eys_2022',
'mys_2017', 'mys_2018', 'mys_2019', 'mys_2020', 'mys_2021', 'mys_2022',
'gnipc_2017', 'gnipc_2018', 'gnipc_2019', 'gnipc_2020', 'gnipc_2021', 'gnipc_2022']]
hdi_1
| iso3 | hdi_2017 | hdi_2018 | hdi_2019 | hdi_2020 | hdi_2021 | hdi_2022 | le_2017 | le_2018 | le_2019 | ... | mys_2019 | mys_2020 | mys_2021 | mys_2022 | gnipc_2017 | gnipc_2018 | gnipc_2019 | gnipc_2020 | gnipc_2021 | gnipc_2022 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AFG | 0.485 | 0.486 | 0.492 | 0.488 | 0.473 | 0.462 | 63.016000 | 63.081000 | 63.565000 | ... | 2.698150 | 2.841610 | 2.985070 | 2.514790 | 2123.676477 | 2082.113799 | 2112.986381 | 1986.803772 | 1534.135377 | 1335.205733 |
| 1 | ALB | 0.796 | 0.797 | 0.800 | 0.784 | 0.785 | 0.789 | 79.047000 | 79.184000 | 79.282000 | ... | 10.072996 | 10.121144 | 10.121144 | 10.121144 | 12802.175940 | 13302.733030 | 13481.971800 | 13069.127700 | 14399.780470 | 15293.326510 |
| 2 | DZA | 0.738 | 0.740 | 0.742 | 0.730 | 0.740 | 0.745 | 75.743000 | 76.066000 | 76.474000 | ... | 7.020270 | 6.987444 | 6.987444 | 6.987444 | 11633.272120 | 11438.083580 | 11353.521810 | 10634.883980 | 10823.118130 | 10978.405710 |
| 3 | AND | 0.860 | 0.863 | 0.865 | 0.843 | 0.855 | 0.884 | 82.980000 | 82.992000 | 83.004000 | ... | 11.084770 | 11.260993 | 11.437216 | 11.613440 | 54906.348230 | 54904.290060 | 55035.017860 | 48026.390490 | 51128.347500 | 54233.449480 |
| 4 | AGO | 0.597 | 0.598 | 0.597 | 0.594 | 0.590 | 0.591 | 61.680000 | 62.144000 | 62.448000 | ... | 5.734512 | 5.844292 | 5.844292 | 5.844292 | 6772.546424 | 6310.063799 | 5918.032996 | 5487.669515 | 5332.948130 | 5327.788251 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 201 | ZZG.ECA | 0.793 | 0.798 | 0.802 | 0.792 | 0.797 | 0.802 | 74.343178 | 74.505838 | 74.700669 | ... | 10.556794 | 10.626553 | 10.618775 | 10.609179 | 17282.309560 | 17776.029860 | 18014.099520 | 18036.728660 | 19214.929590 | 19762.981770 |
| 202 | ZZH.LAC | 0.764 | 0.766 | 0.768 | 0.755 | 0.754 | 0.763 | 74.705783 | 74.823396 | 75.014843 | ... | 8.860465 | 8.969176 | 8.973933 | 8.967833 | 15198.560840 | 15131.255810 | 14985.581440 | 13848.678100 | 14646.857190 | 15109.406150 |
| 203 | ZZI.SA | 0.631 | 0.632 | 0.635 | 0.635 | 0.631 | 0.641 | 70.245298 | 70.517337 | 70.722512 | ... | 6.358383 | 6.582805 | 6.612892 | 6.629607 | 6197.721643 | 6460.624904 | 6605.112625 | 6237.998412 | 6638.663046 | 6971.625949 |
| 204 | ZZJ.SSA | 0.540 | 0.544 | 0.549 | 0.547 | 0.546 | 0.549 | 60.348740 | 60.735554 | 61.120022 | ... | 5.898415 | 5.886779 | 5.989415 | 5.975957 | 3688.769761 | 3695.077956 | 3715.500559 | 3570.227167 | 3611.615300 | 3666.202926 |
| 205 | ZZK.WORLD | 0.732 | 0.735 | 0.739 | 0.736 | 0.735 | 0.739 | 72.568987 | 72.816178 | 73.012121 | ... | 8.574383 | 8.686374 | 8.682265 | 8.661598 | 16060.191190 | 16431.285550 | 16713.676430 | 16015.845830 | 16837.841800 | 17254.434040 |
206 rows × 31 columns
years_of_interest = [2017, 2018, 2019, 2020, 2021, 2022]
indices_of_interest = ['hdi', 'le', 'eys', 'mys', 'gnipc']
hdi_1_nonflat = pd.DataFrame({'iso3': list(hdi_1['iso3'].values)*6})
hdi_1_nonflat = hdi_1_nonflat.sort_values(['iso3'])
hdi_1_nonflat['year'] = years_of_interest*hdi_1.shape[0]
hdi_1_nonflat[indices_of_interest] = None
hdi_1_nonflat = hdi_1_nonflat.reset_index(drop=True)
hdi_1_nonflat
| iso3 | year | hdi | le | eys | mys | gnipc | |
|---|---|---|---|---|---|---|---|
| 0 | AFG | 2017 | None | None | None | None | None |
| 1 | AFG | 2018 | None | None | None | None | None |
| 2 | AFG | 2019 | None | None | None | None | None |
| 3 | AFG | 2020 | None | None | None | None | None |
| 4 | AFG | 2021 | None | None | None | None | None |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1231 | ZZK.WORLD | 2018 | None | None | None | None | None |
| 1232 | ZZK.WORLD | 2019 | None | None | None | None | None |
| 1233 | ZZK.WORLD | 2020 | None | None | None | None | None |
| 1234 | ZZK.WORLD | 2021 | None | None | None | None | None |
| 1235 | ZZK.WORLD | 2022 | None | None | None | None | None |
1236 rows × 7 columns
hdi_1_nonflat.groupby('iso3')['year'].nunique().value_counts()
year 6 206 Name: count, dtype: int64
for index in indices_of_interest:
for year in years_of_interest:
for country in hdi_1_nonflat['iso3'].unique():
hdi_1_nonflat.loc[(hdi_1_nonflat['year']==year)&(hdi_1_nonflat['iso3']==country),
index] = hdi_1.loc[hdi_1['iso3']==country, f'{index}_{year}'].values[0]
hdi_1_nonflat
| iso3 | year | hdi | le | eys | mys | gnipc | |
|---|---|---|---|---|---|---|---|
| 0 | AFG | 2017 | 0.485 | 63.016 | 10.519565 | 2.41123 | 2123.676477 |
| 1 | AFG | 2018 | 0.486 | 63.081 | 10.53786 | 2.55469 | 2082.113799 |
| 2 | AFG | 2019 | 0.492 | 63.565 | 10.621292 | 2.69815 | 2112.986381 |
| 3 | AFG | 2020 | 0.488 | 62.575 | 10.705385 | 2.84161 | 1986.803772 |
| 4 | AFG | 2021 | 0.473 | 61.982 | 10.705385 | 2.98507 | 1534.135377 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1231 | ZZK.WORLD | 2018 | 0.735 | 72.816178 | 12.651182 | 8.483187 | 16431.28555 |
| 1232 | ZZK.WORLD | 2019 | 0.739 | 73.012121 | 12.726141 | 8.574383 | 16713.67643 |
| 1233 | ZZK.WORLD | 2020 | 0.736 | 72.257355 | 12.860674 | 8.686374 | 16015.84583 |
| 1234 | ZZK.WORLD | 2021 | 0.735 | 71.365524 | 12.91184 | 8.682265 | 16837.8418 |
| 1235 | ZZK.WORLD | 2022 | 0.739 | 72.00407 | 12.989766 | 8.661598 | 17254.43404 |
1236 rows × 7 columns
Скачиваем данные для контрольных переменных (уровень безработицы, расходы на образование, расходы на здравоохранение)¶
unemployment = pd.read_csv("Unemployment.csv", sep=',')
unemployment = unemployment[["geo", "TIME_PERIOD", "OBS_VALUE"]]
unemployment.rename(columns={'geo': 'iso2', "OBS_VALUE": "unemployment"}, inplace=True)
unemployment.loc[unemployment['iso2'] == "EL", 'iso2'] = "GR"
unemployment
| iso2 | TIME_PERIOD | unemployment | |
|---|---|---|---|
| 0 | AT | 2017 | 5.9 |
| 1 | AT | 2018 | 5.2 |
| 2 | AT | 2019 | 4.8 |
| 3 | AT | 2020 | 6.0 |
| 4 | AT | 2021 | 6.2 |
| ... | ... | ... | ... |
| 157 | SK | 2018 | 6.5 |
| 158 | SK | 2019 | 5.7 |
| 159 | SK | 2020 | 6.7 |
| 160 | SK | 2021 | 6.8 |
| 161 | SK | 2022 | 6.1 |
162 rows × 3 columns
education = pd.read_csv("education.csv", sep=',')
education = education[["geo", "TIME_PERIOD", "OBS_VALUE"]]
education.rename(columns={'geo': 'iso2', "OBS_VALUE": "education"}, inplace=True)
education.loc[education['iso2'] == "EL", 'iso2'] = "GR"
education
| iso2 | TIME_PERIOD | education | |
|---|---|---|---|
| 0 | AT | 2017 | 4.8 |
| 1 | AT | 2018 | 4.8 |
| 2 | AT | 2019 | 4.8 |
| 3 | AT | 2020 | 5.1 |
| 4 | AT | 2021 | 4.9 |
| ... | ... | ... | ... |
| 175 | SK | 2018 | 3.9 |
| 176 | SK | 2019 | 4.2 |
| 177 | SK | 2020 | 4.4 |
| 178 | SK | 2021 | 4.3 |
| 179 | SK | 2022 | 4.5 |
180 rows × 3 columns
health = pd.read_csv("health.csv", sep=',')
health = health[["geo", "TIME_PERIOD", "OBS_VALUE"]]
health.rename(columns={'geo': 'iso2', "OBS_VALUE": "health"}, inplace=True)
health.loc[health['iso2'] == "EL", 'iso2'] = "GR"
health
| iso2 | TIME_PERIOD | health | |
|---|---|---|---|
| 0 | AT | 2017 | 8.2 |
| 1 | AT | 2018 | 8.2 |
| 2 | AT | 2019 | 8.3 |
| 3 | AT | 2020 | 9.2 |
| 4 | AT | 2021 | 10.1 |
| ... | ... | ... | ... |
| 175 | SK | 2018 | 5.5 |
| 176 | SK | 2019 | 5.7 |
| 177 | SK | 2020 | 6.1 |
| 178 | SK | 2021 | 6.9 |
| 179 | SK | 2022 | 6.4 |
180 rows × 3 columns
Создаем общий датафрейм, в который входят показатели HDI и I-desi по странам ЕС за 2017-2022¶
desi_hdi_df = desi_1.merge(hdi_1_nonflat,
how='left', left_on=['alpha-3', 'period'], right_on=['iso3', 'year'])
desi_hdi_df
| period | country | indicator | breakdown | unit | value | flags | alpha-3 | alpha-2 | name | total_score_desi | iso3 | year | hdi | le | eys | mys | gnipc | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi | desi_hc | pc_desi | 12.738011 | NaN | AUT | AT | Austria | 54.675671 | AUT | 2022.0 | 0.926 | 82.412 | 16.36746 | 12.305714 | 56529.66329 |
| 1 | 2022 | BE | desi | desi_hc | pc_desi | 12.172748 | NaN | BEL | BE | Belgium | 50.307388 | BEL | 2022.0 | 0.942 | 82.293 | 18.94574 | 12.528578 | 53644.03854 |
| 2 | 2022 | BG | desi | desi_hc | pc_desi | 8.147561 | NaN | BGR | BG | Bulgaria | 37.679882 | BGR | 2022.0 | 0.799 | 71.528 | 13.86803 | 11.41318 | 25920.80375 |
| 3 | 2022 | CY | desi | desi_hc | pc_desi | 10.441309 | NaN | CYP | CY | Cyprus | 48.352205 | CYP | 2022.0 | 0.907 | 81.889 | 16.24309 | 12.44017 | 40136.89453 |
| 4 | 2022 | CZ | desi | desi_hc | pc_desi | 11.397470 | NaN | CZE | CZ | Czechia | 49.143522 | CZE | 2022.0 | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 667 | 2017 | RO | desi | desi_dps | pc_desi | 1.853090 | NaN | ROU | RO | Romania | 19.399117 | ROU | 2017.0 | 0.823 | 75.952 | 14.23019 | 11.14865 | 26557.75639 |
| 668 | 2017 | SE | desi | desi_dps | pc_desi | 14.785592 | NaN | SWE | SE | Sweden | 45.711845 | SWE | 2017.0 | 0.941 | 82.438 | 18.864201 | 12.48605 | 52868.77901 |
| 669 | 2017 | SI | desi | desi_dps | pc_desi | 11.599579 | NaN | SVN | SI | Slovenia | 35.702736 | SVN | 2017.0 | 0.912 | 81.071 | 17.47266 | 12.70029 | 35849.55565 |
| 670 | 2017 | SK | desi | desi_dps | pc_desi | 9.072821 | NaN | SVK | SK | Slovakia | 29.783805 | SVK | 2017.0 | 0.857 | 77.219 | 14.59997 | 12.76618 | 29599.14794 |
| 671 | 2017 | EU | desi | desi_dps | pc_desi | 11.675391 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
672 rows × 18 columns
desi_hdi_df = desi_hdi_df[desi_hdi_df['country']!='EU']
desi_hdi_df = desi_hdi_df.drop(['unit', 'flags', "alpha-3", "alpha-2", "indicator", "year"], axis=1)
desi_hdi_df
| period | country | breakdown | value | name | total_score_desi | iso3 | hdi | le | eys | mys | gnipc | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi_hc | 12.738011 | Austria | 54.675671 | AUT | 0.926 | 82.412 | 16.36746 | 12.305714 | 56529.66329 |
| 1 | 2022 | BE | desi_hc | 12.172748 | Belgium | 50.307388 | BEL | 0.942 | 82.293 | 18.94574 | 12.528578 | 53644.03854 |
| 2 | 2022 | BG | desi_hc | 8.147561 | Bulgaria | 37.679882 | BGR | 0.799 | 71.528 | 13.86803 | 11.41318 | 25920.80375 |
| 3 | 2022 | CY | desi_hc | 10.441309 | Cyprus | 48.352205 | CYP | 0.907 | 81.889 | 16.24309 | 12.44017 | 40136.89453 |
| 4 | 2022 | CZ | desi_hc | 11.397470 | Czechia | 49.143522 | CZE | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 666 | 2017 | PT | desi_dps | 12.385897 | Portugal | 35.478987 | PRT | 0.857 | 81.498 | 16.3622 | 9.21823 | 32269.04155 |
| 667 | 2017 | RO | desi_dps | 1.853090 | Romania | 19.399117 | ROU | 0.823 | 75.952 | 14.23019 | 11.14865 | 26557.75639 |
| 668 | 2017 | SE | desi_dps | 14.785592 | Sweden | 45.711845 | SWE | 0.941 | 82.438 | 18.864201 | 12.48605 | 52868.77901 |
| 669 | 2017 | SI | desi_dps | 11.599579 | Slovenia | 35.702736 | SVN | 0.912 | 81.071 | 17.47266 | 12.70029 | 35849.55565 |
| 670 | 2017 | SK | desi_dps | 9.072821 | Slovakia | 29.783805 | SVK | 0.857 | 77.219 | 14.59997 | 12.76618 | 29599.14794 |
648 rows × 12 columns
desi_hdi_df[indices_of_interest] = desi_hdi_df[indices_of_interest].astype(float)
desi_hdi_df['breakdown'].unique()
array(['desi_hc', 'desi_conn', 'desi_idt', 'desi_dps'], dtype=object)
Добавление контроля в датафрейм¶
desi_hdi_df = desi_hdi_df.merge(unemployment,
how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
desi_hdi_df
| period | country | breakdown | value | name | total_score_desi | iso3 | hdi | le | eys | mys | gnipc | iso2 | TIME_PERIOD | unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi_hc | 12.738011 | Austria | 54.675671 | AUT | 0.926 | 82.412 | 16.367460 | 12.305714 | 56529.66329 | AT | 2022 | 4.8 |
| 1 | 2022 | BE | desi_hc | 12.172748 | Belgium | 50.307388 | BEL | 0.942 | 82.293 | 18.945740 | 12.528578 | 53644.03854 | BE | 2022 | 5.6 |
| 2 | 2022 | BG | desi_hc | 8.147561 | Bulgaria | 37.679882 | BGR | 0.799 | 71.528 | 13.868030 | 11.413180 | 25920.80375 | BG | 2022 | 4.2 |
| 3 | 2022 | CY | desi_hc | 10.441309 | Cyprus | 48.352205 | CYP | 0.907 | 81.889 | 16.243090 | 12.440170 | 40136.89453 | CY | 2022 | 6.8 |
| 4 | 2022 | CZ | desi_hc | 11.397470 | Czechia | 49.143522 | CZE | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 | CZ | 2022 | 2.2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 643 | 2017 | PT | desi_dps | 12.385897 | Portugal | 35.478987 | PRT | 0.857 | 81.498 | 16.362200 | 9.218230 | 32269.04155 | PT | 2017 | 9.2 |
| 644 | 2017 | RO | desi_dps | 1.853090 | Romania | 19.399117 | ROU | 0.823 | 75.952 | 14.230190 | 11.148650 | 26557.75639 | RO | 2017 | 6.1 |
| 645 | 2017 | SE | desi_dps | 14.785592 | Sweden | 45.711845 | SWE | 0.941 | 82.438 | 18.864201 | 12.486050 | 52868.77901 | SE | 2017 | 6.8 |
| 646 | 2017 | SI | desi_dps | 11.599579 | Slovenia | 35.702736 | SVN | 0.912 | 81.071 | 17.472660 | 12.700290 | 35849.55565 | SI | 2017 | 6.6 |
| 647 | 2017 | SK | desi_dps | 9.072821 | Slovakia | 29.783805 | SVK | 0.857 | 77.219 | 14.599970 | 12.766180 | 29599.14794 | SK | 2017 | 8.1 |
648 rows × 15 columns
desi_hdi_df = desi_hdi_df.merge(education,
how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
desi_hdi_df = desi_hdi_df.merge(health,
how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
desi_hdi_df = desi_hdi_df.drop(['iso2_x', 'TIME_PERIOD_x', "iso2_y", "TIME_PERIOD_y", "iso2", "TIME_PERIOD"], axis=1)
desi_hdi_df
| period | country | breakdown | value | name | total_score_desi | iso3 | hdi | le | eys | mys | gnipc | unemployment | education | health | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi_hc | 12.738011 | Austria | 54.675671 | AUT | 0.926 | 82.412 | 16.367460 | 12.305714 | 56529.66329 | 4.8 | 4.8 | 9.3 |
| 1 | 2022 | BE | desi_hc | 12.172748 | Belgium | 50.307388 | BEL | 0.942 | 82.293 | 18.945740 | 12.528578 | 53644.03854 | 5.6 | 6.3 | 8.1 |
| 2 | 2022 | BG | desi_hc | 8.147561 | Bulgaria | 37.679882 | BGR | 0.799 | 71.528 | 13.868030 | 11.413180 | 25920.80375 | 4.2 | 3.9 | 5.6 |
| 3 | 2022 | CY | desi_hc | 10.441309 | Cyprus | 48.352205 | CYP | 0.907 | 81.889 | 16.243090 | 12.440170 | 40136.89453 | 6.8 | 5.1 | 6.2 |
| 4 | 2022 | CZ | desi_hc | 11.397470 | Czechia | 49.143522 | CZE | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 | 2.2 | 4.9 | 9.1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 643 | 2017 | PT | desi_dps | 12.385897 | Portugal | 35.478987 | PRT | 0.857 | 81.498 | 16.362200 | 9.218230 | 32269.04155 | 9.2 | 4.6 | 6.2 |
| 644 | 2017 | RO | desi_dps | 1.853090 | Romania | 19.399117 | ROU | 0.823 | 75.952 | 14.230190 | 11.148650 | 26557.75639 | 6.1 | 2.9 | 4.4 |
| 645 | 2017 | SE | desi_dps | 14.785592 | Sweden | 45.711845 | SWE | 0.941 | 82.438 | 18.864201 | 12.486050 | 52868.77901 | 6.8 | 6.7 | 6.8 |
| 646 | 2017 | SI | desi_dps | 11.599579 | Slovenia | 35.702736 | SVN | 0.912 | 81.071 | 17.472660 | 12.700290 | 35849.55565 | 6.6 | 5.4 | 6.6 |
| 647 | 2017 | SK | desi_dps | 9.072821 | Slovakia | 29.783805 | SVK | 0.857 | 77.219 | 14.599970 | 12.766180 | 29599.14794 | 8.1 | 3.9 | 5.5 |
648 rows × 15 columns
Разведочный анализ данных с помощью визуализации¶
desi_hc = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_hc"]
desi_hc = desi_hc.rename({'value': 'desi_hc'}, axis=1)
desi_conn = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_conn"]
desi_conn = desi_conn.rename({'value': 'desi_conn'}, axis=1)
desi_idt = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_idt"]
desi_idt = desi_idt.rename({'value': 'desi_idt'}, axis=1)
desi_dps = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_dps"]
desi_dps = desi_dps.rename({'value': 'desi_dps'}, axis=1)
final_df = desi_hc.merge(desi_conn[['desi_conn', 'period', 'iso3']], on=['period', 'iso3'])
final_df = final_df.merge(desi_idt[['desi_idt', 'period', 'iso3']], on=['period', 'iso3'])
final_df = final_df.merge(desi_dps[['desi_dps', 'period', 'iso3']], on=['period', 'iso3'])
final_df
| period | country | breakdown | desi_hc | name | total_score_desi | iso3 | hdi | le | eys | mys | gnipc | unemployment | education | health | desi_conn | desi_idt | desi_dps | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | AT | desi_hc | 12.738011 | Austria | 54.675671 | AUT | 0.926 | 82.412 | 16.367460 | 12.305714 | 56529.66329 | 4.8 | 4.8 | 9.3 | 14.116365 | 9.791828 | 18.029467 |
| 1 | 2022 | BE | desi_hc | 12.172748 | Belgium | 50.307388 | BEL | 0.942 | 82.293 | 18.945740 | 12.528578 | 53644.03854 | 5.6 | 6.3 | 8.1 | 9.956698 | 11.989702 | 16.188240 |
| 2 | 2022 | BG | desi_hc | 8.147561 | Bulgaria | 37.679882 | BGR | 0.799 | 71.528 | 13.868030 | 11.413180 | 25920.80375 | 4.2 | 3.9 | 5.6 | 12.675763 | 3.882440 | 12.974119 |
| 3 | 2022 | CY | desi_hc | 10.441309 | Cyprus | 48.352205 | CYP | 0.907 | 81.889 | 16.243090 | 12.440170 | 40136.89453 | 6.8 | 5.1 | 6.2 | 14.694059 | 8.836923 | 14.379914 |
| 4 | 2022 | CZ | desi_hc | 11.397470 | Czechia | 49.143522 | CZE | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 | 2.2 | 4.9 | 9.1 | 13.172476 | 8.459601 | 16.113975 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 157 | 2017 | PT | desi_hc | 9.731173 | Portugal | 35.478987 | PRT | 0.857 | 81.498 | 16.362200 | 9.218230 | 32269.04155 | 9.2 | 4.6 | 6.2 | 6.734946 | 6.626971 | 12.385897 |
| 158 | 2017 | RO | desi_hc | 6.973170 | Romania | 19.399117 | ROU | 0.823 | 75.952 | 14.230190 | 11.148650 | 26557.75639 | 6.1 | 2.9 | 4.4 | 7.893893 | 2.678963 | 1.853090 |
| 159 | 2017 | SE | desi_hc | 13.499455 | Sweden | 45.711845 | SWE | 0.941 | 82.438 | 18.864201 | 12.486050 | 52868.77901 | 6.8 | 6.7 | 6.8 | 8.987967 | 8.438830 | 14.785592 |
| 160 | 2017 | SI | desi_hc | 10.102175 | Slovenia | 35.702736 | SVN | 0.912 | 81.071 | 17.472660 | 12.700290 | 35849.55565 | 6.6 | 5.4 | 6.6 | 7.561989 | 6.438993 | 11.599579 |
| 161 | 2017 | SK | desi_hc | 9.404006 | Slovakia | 29.783805 | SVK | 0.857 | 77.219 | 14.599970 | 12.766180 | 29599.14794 | 8.1 | 3.9 | 5.5 | 6.512537 | 4.794442 | 9.072821 |
162 rows × 18 columns
fig = px.scatter(final_df, x = 'desi_hc', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
title = "Human development index and Human capital(I-desi)")
fig.update_layout(
autosize=False,
width=800,
height=600,
)
fig.show()
fig = px.scatter(final_df, x = 'desi_conn', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
title = "Human development index and Connectivity(I-desi)")
fig.update_layout(
autosize=False,
width=800,
height=600,
)
fig.show()
fig = px.scatter(final_df, x = 'desi_idt', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
title = "Human development index and Integration of digital technology(I-desi)")
fig.update_layout(
autosize=False,
width=800,
height=600,
)
fig.show()
fig = px.scatter(final_df, x = 'desi_dps', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
title = "Human development index and Digital public services(I-desi)")
fig.update_layout(
autosize=False,
width=800,
height=600,
)
fig.show()
Построение модели регресии между HDI и I-desi¶
final_df = final_df[['period', 'name', 'iso3', 'total_score_desi', 'hdi', 'le',
'eys', 'mys', 'gnipc', 'desi_conn', 'desi_idt', 'desi_dps', 'desi_hc', 'unemployment', 'education', 'health']]
final_df
| period | name | iso3 | total_score_desi | hdi | le | eys | mys | gnipc | desi_conn | desi_idt | desi_dps | desi_hc | unemployment | education | health | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | Austria | AUT | 54.675671 | 0.926 | 82.412 | 16.367460 | 12.305714 | 56529.66329 | 14.116365 | 9.791828 | 18.029467 | 12.738011 | 4.8 | 4.8 | 9.3 |
| 1 | 2022 | Belgium | BEL | 50.307388 | 0.942 | 82.293 | 18.945740 | 12.528578 | 53644.03854 | 9.956698 | 11.989702 | 16.188240 | 12.172748 | 5.6 | 6.3 | 8.1 |
| 2 | 2022 | Bulgaria | BGR | 37.679882 | 0.799 | 71.528 | 13.868030 | 11.413180 | 25920.80375 | 12.675763 | 3.882440 | 12.974119 | 8.147561 | 4.2 | 3.9 | 5.6 |
| 3 | 2022 | Cyprus | CYP | 48.352205 | 0.907 | 81.889 | 16.243090 | 12.440170 | 40136.89453 | 14.694059 | 8.836923 | 14.379914 | 10.441309 | 6.8 | 5.1 | 6.2 |
| 4 | 2022 | Czechia | CZE | 49.143522 | 0.895 | 78.129 | 16.347281 | 12.916053 | 39944.66682 | 13.172476 | 8.459601 | 16.113975 | 11.397470 | 2.2 | 4.9 | 9.1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 157 | 2017 | Portugal | PRT | 35.478987 | 0.857 | 81.498 | 16.362200 | 9.218230 | 32269.04155 | 6.734946 | 6.626971 | 12.385897 | 9.731173 | 9.2 | 4.6 | 6.2 |
| 158 | 2017 | Romania | ROU | 19.399117 | 0.823 | 75.952 | 14.230190 | 11.148650 | 26557.75639 | 7.893893 | 2.678963 | 1.853090 | 6.973170 | 6.1 | 2.9 | 4.4 |
| 159 | 2017 | Sweden | SWE | 45.711845 | 0.941 | 82.438 | 18.864201 | 12.486050 | 52868.77901 | 8.987967 | 8.438830 | 14.785592 | 13.499455 | 6.8 | 6.7 | 6.8 |
| 160 | 2017 | Slovenia | SVN | 35.702736 | 0.912 | 81.071 | 17.472660 | 12.700290 | 35849.55565 | 7.561989 | 6.438993 | 11.599579 | 10.102175 | 6.6 | 5.4 | 6.6 |
| 161 | 2017 | Slovakia | SVK | 29.783805 | 0.857 | 77.219 | 14.599970 | 12.766180 | 29599.14794 | 6.512537 | 4.794442 | 9.072821 | 9.404006 | 8.1 | 3.9 | 5.5 |
162 rows × 16 columns
Наш получившийся датафрейм final_df охватывает период значений индексов за 2017-2022 годы и состоит из:
- name, iso3 - 27 стран ЕС
- измерения индекса цифровизации I-desi по 4 субиндексам (каждый весит 25%): Human Capital, Connectivity, Integration of Digital Technology, Digital Public Services:
- desi_hc субиндекс Human capital
- desi_conn субиндекс Connectivity
- desi_idt субиндекс Integration of digital technology
- desi_dps субиндекс Digital public services
- total_score_desi - общий индекс I-desi стран за период
В нашем исслеедовании общий показатель показатель I-desi и каждый отдельный из 4 субиндексов будут независимыми переменными.
Каждый из субиндексов измеряется по нескольким параметрам. Но в нашем регрессионом анализе мы не будем уходить на такой уровень детализации.
Все значения нормированы по минимально-максимальному подходу: минимальный фактический показатель умножается на 0.75, максимальный фактический показатель умножается на 1.75. Общее значение шкалы от 0 до 100, где 100 -- лучшее значение.
- hdi - обобщенный усредненный показатель ИЧР за каждый год периода (с 2017 по 2022)
- le - ожидаемая продолжительность жизни
- eys - ожидаемое количество лет обучения
- mys- среднее количество лет обучения
- pc - ВНД на душу (в ценах 2017 года)
Субиндексы HDI в нашем исследовании будут зависимыми переменными.
Контрольные переменные:
- unemployment- уровень безработицы в %
- education - гос.расходы на образование в % от всех расходов
- health - гос.расходы на здравоохранение в % от всех расходов
Так как данные панельные, то мы будем использовать модель OLS для панельных данных
final_df_index = final_df.sort_values(['name', 'period']).set_index(['name', 'period'])
Регрессии всех показателей цифровизации на каждый отдельный субиндекс качества жизни¶
corr = final_df[['total_score_desi', 'hdi', 'le',
'eys', 'mys', 'gnipc', 'desi_conn', 'desi_idt', 'desi_dps', 'desi_hc', 'unemployment', 'education', 'health']].corr()
sns.set_theme(rc={'figure.figsize':(11.7,8.27)})
sns.heatmap(corr, annot=True)
<Axes: >
Регрессии с дамми переменными¶
model00 = statf.ols('hdi ~ total_score_desi + health + unemployment + education + C(name, Treatment("Finland")) + C(period)', data = final_df).fit()
print(model00.summary())
OLS Regression Results
==============================================================================
Dep. Variable: hdi R-squared: 0.994
Model: OLS Adj. R-squared: 0.993
Method: Least Squares F-statistic: 614.9
Date: Thu, 13 Jun 2024 Prob (F-statistic): 6.67e-125
Time: 10:31:29 Log-Likelihood: 719.87
No. Observations: 162 AIC: -1368.
Df Residuals: 126 BIC: -1257.
Df Model: 35
Covariance Type: nonrobust
================================================================================================================
coef std err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------------------
Intercept 0.9164 0.015 59.230 0.000 0.886 0.947
C(name, Treatment("Finland"))[T.Austria] -0.0132 0.004 -3.291 0.001 -0.021 -0.005
C(name, Treatment("Finland"))[T.Belgium] 0.0091 0.004 2.522 0.013 0.002 0.016
C(name, Treatment("Finland"))[T.Bulgaria] -0.1186 0.007 -16.176 0.000 -0.133 -0.104
C(name, Treatment("Finland"))[T.Croatia] -0.0612 0.005 -12.437 0.000 -0.071 -0.051
C(name, Treatment("Finland"))[T.Cyprus] -0.0237 0.005 -4.350 0.000 -0.034 -0.013
C(name, Treatment("Finland"))[T.Czechia] -0.0361 0.005 -7.129 0.000 -0.046 -0.026
C(name, Treatment("Finland"))[T.Denmark] 0.0088 0.002 3.976 0.000 0.004 0.013
C(name, Treatment("Finland"))[T.Estonia] -0.0396 0.003 -13.185 0.000 -0.046 -0.034
C(name, Treatment("Finland"))[T.France] -0.0238 0.004 -5.921 0.000 -0.032 -0.016
C(name, Treatment("Finland"))[T.Germany] 0.0156 0.005 3.263 0.001 0.006 0.025
C(name, Treatment("Finland"))[T.Greece] -0.0302 0.008 -3.763 0.000 -0.046 -0.014
C(name, Treatment("Finland"))[T.Hungary] -0.0755 0.006 -12.933 0.000 -0.087 -0.064
C(name, Treatment("Finland"))[T.Ireland] 0.0005 0.005 0.107 0.915 -0.009 0.010
C(name, Treatment("Finland"))[T.Italy] -0.0306 0.006 -5.243 0.000 -0.042 -0.019
C(name, Treatment("Finland"))[T.Latvia] -0.0569 0.004 -13.713 0.000 -0.065 -0.049
C(name, Treatment("Finland"))[T.Lithuania] -0.0511 0.004 -12.435 0.000 -0.059 -0.043
C(name, Treatment("Finland"))[T.Luxembourg] -0.0137 0.003 -4.252 0.000 -0.020 -0.007
C(name, Treatment("Finland"))[T.Malta] -0.0306 0.003 -9.753 0.000 -0.037 -0.024
C(name, Treatment("Finland"))[T.Netherlands] 0.0006 0.002 0.256 0.798 -0.004 0.005
C(name, Treatment("Finland"))[T.Poland] -0.0461 0.006 -7.234 0.000 -0.059 -0.033
C(name, Treatment("Finland"))[T.Portugal] -0.0676 0.004 -15.648 0.000 -0.076 -0.059
C(name, Treatment("Finland"))[T.Romania] -0.0937 0.009 -10.636 0.000 -0.111 -0.076
C(name, Treatment("Finland"))[T.Slovakia] -0.0691 0.006 -11.885 0.000 -0.081 -0.058
C(name, Treatment("Finland"))[T.Slovenia] -0.0128 0.004 -3.471 0.001 -0.020 -0.005
C(name, Treatment("Finland"))[T.Spain] -0.0324 0.004 -7.538 0.000 -0.041 -0.024
C(name, Treatment("Finland"))[T.Sweden] 0.0131 0.003 5.192 0.000 0.008 0.018
C(period)[T.2018] 0.0012 0.001 1.131 0.260 -0.001 0.003
C(period)[T.2019] 0.0029 0.002 1.896 0.060 -0.000 0.006
C(period)[T.2020] -0.0025 0.002 -1.113 0.268 -0.007 0.002
C(period)[T.2021] -0.0047 0.003 -1.577 0.117 -0.011 0.001
C(period)[T.2022] -0.0042 0.004 -1.076 0.284 -0.012 0.004
total_score_desi 0.0008 0.000 3.585 0.000 0.000 0.001
health 0.0001 0.001 0.179 0.858 -0.001 0.002
unemployment -0.0004 0.000 -1.191 0.236 -0.001 0.000
education -0.0032 0.002 -2.137 0.035 -0.006 -0.000
==============================================================================
Omnibus: 2.482 Durbin-Watson: 2.583
Prob(Omnibus): 0.289 Jarque-Bera (JB): 2.320
Skew: 0.089 Prob(JB): 0.313
Kurtosis: 3.558 Cond. No. 4.87e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.87e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
model01 = statf.ols('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + health + unemployment + education + C(name, Treatment("Finland")) + C(period)',
data = final_df).fit()
print(model01.summary())
OLS Regression Results
==============================================================================
Dep. Variable: hdi R-squared: 0.995
Model: OLS Adj. R-squared: 0.993
Method: Least Squares F-statistic: 641.5
Date: Thu, 13 Jun 2024 Prob (F-statistic): 1.12e-124
Time: 10:31:29 Log-Likelihood: 731.85
No. Observations: 162 AIC: -1386.
Df Residuals: 123 BIC: -1265.
Df Model: 38
Covariance Type: nonrobust
================================================================================================================
coef std err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------------------
Intercept 0.9280 0.026 35.164 0.000 0.876 0.980
C(name, Treatment("Finland"))[T.Austria] -0.0137 0.006 -2.223 0.028 -0.026 -0.002
C(name, Treatment("Finland"))[T.Belgium] 0.0004 0.008 0.053 0.958 -0.015 0.016
C(name, Treatment("Finland"))[T.Bulgaria] -0.1167 0.013 -8.712 0.000 -0.143 -0.090
C(name, Treatment("Finland"))[T.Croatia] -0.0619 0.009 -6.723 0.000 -0.080 -0.044
C(name, Treatment("Finland"))[T.Cyprus] -0.0280 0.011 -2.596 0.011 -0.049 -0.007
C(name, Treatment("Finland"))[T.Czechia] -0.0403 0.009 -4.617 0.000 -0.058 -0.023
C(name, Treatment("Finland"))[T.Denmark] 0.0052 0.004 1.378 0.171 -0.002 0.013
C(name, Treatment("Finland"))[T.Estonia] -0.0368 0.007 -5.595 0.000 -0.050 -0.024
C(name, Treatment("Finland"))[T.France] -0.0217 0.007 -3.004 0.003 -0.036 -0.007
C(name, Treatment("Finland"))[T.Germany] 0.0140 0.009 1.602 0.112 -0.003 0.031
C(name, Treatment("Finland"))[T.Greece] -0.0285 0.015 -1.955 0.053 -0.057 0.000
C(name, Treatment("Finland"))[T.Hungary] -0.0736 0.011 -6.650 0.000 -0.096 -0.052
C(name, Treatment("Finland"))[T.Ireland] 0.0021 0.005 0.392 0.696 -0.009 0.013
C(name, Treatment("Finland"))[T.Italy] -0.0357 0.012 -3.008 0.003 -0.059 -0.012
C(name, Treatment("Finland"))[T.Latvia] -0.0507 0.009 -5.711 0.000 -0.068 -0.033
C(name, Treatment("Finland"))[T.Lithuania] -0.0543 0.009 -5.833 0.000 -0.073 -0.036
C(name, Treatment("Finland"))[T.Luxembourg] -0.0074 0.006 -1.320 0.189 -0.018 0.004
C(name, Treatment("Finland"))[T.Malta] -0.0328 0.006 -5.596 0.000 -0.044 -0.021
C(name, Treatment("Finland"))[T.Netherlands] 0.0001 0.003 0.043 0.966 -0.006 0.007
C(name, Treatment("Finland"))[T.Poland] -0.0475 0.012 -3.970 0.000 -0.071 -0.024
C(name, Treatment("Finland"))[T.Portugal] -0.0703 0.009 -8.025 0.000 -0.088 -0.053
C(name, Treatment("Finland"))[T.Romania] -0.0939 0.018 -5.110 0.000 -0.130 -0.058
C(name, Treatment("Finland"))[T.Slovakia] -0.0693 0.011 -6.427 0.000 -0.091 -0.048
C(name, Treatment("Finland"))[T.Slovenia] -0.0173 0.008 -2.111 0.037 -0.034 -0.001
C(name, Treatment("Finland"))[T.Spain] -0.0258 0.007 -3.614 0.000 -0.040 -0.012
C(name, Treatment("Finland"))[T.Sweden] 0.0109 0.004 2.957 0.004 0.004 0.018
C(period)[T.2018] 0.0003 0.001 0.203 0.839 -0.002 0.003
C(period)[T.2019] 0.0017 0.002 0.782 0.436 -0.003 0.006
C(period)[T.2020] -0.0039 0.003 -1.204 0.231 -0.010 0.002
C(period)[T.2021] -0.0057 0.004 -1.314 0.191 -0.014 0.003
C(period)[T.2022] -0.0040 0.005 -0.728 0.468 -0.015 0.007
desi_conn 2.348e-05 0.000 0.085 0.933 -0.001 0.001
desi_idt 0.0031 0.001 5.149 0.000 0.002 0.004
desi_dps 0.0004 0.001 0.367 0.714 -0.002 0.002
desi_hc -0.0008 0.001 -0.690 0.491 -0.003 0.002
health 0.0004 0.001 0.490 0.625 -0.001 0.002
unemployment -0.0008 0.000 -2.338 0.021 -0.002 -0.000
education -0.0024 0.001 -1.644 0.103 -0.005 0.000
==============================================================================
Omnibus: 1.147 Durbin-Watson: 2.449
Prob(Omnibus): 0.564 Jarque-Bera (JB): 0.762
Skew: -0.061 Prob(JB): 0.683
Kurtosis: 3.313 Cond. No. 5.42e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.42e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Регрессии с фиксированными панельными показателями¶
Также после каждой модели проверим ее качество оценок коэффицентов по подгруппам стран, чтобы исключить смещение
model0 = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health + education + unemployment', data = final_df_index).fit()
print(model0)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.1614
Estimator: PanelOLS R-squared (Between): 0.0328
No. Observations: 162 R-squared (Within): 0.1907
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0328
Time: 10:31:30 Log-likelihood 719.87
Cov. Estimator: Unadjusted
F-statistic: 6.0607
Entities: 27 P-value 0.0002
Avg Obs: 6.0000 Distribution: F(4,126)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 6.0607
P-value 0.0002
Time periods: 6 Distribution: F(4,126)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
====================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------------
total_score_desi 0.0008 0.0002 3.5847 0.0005 0.0003 0.0012
health 0.0001 0.0007 0.1792 0.8581 -0.0013 0.0016
education -0.0032 0.0015 -2.1366 0.0346 -0.0062 -0.0002
unemployment -0.0004 0.0003 -1.1907 0.2360 -0.0011 0.0003
====================================================================================
F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)
Included effects: Entity, Time
model0_robust_entity = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health + education + unemployment',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model0_robust_entity)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.1614
Estimator: PanelOLS R-squared (Between): 0.0328
No. Observations: 162 R-squared (Within): 0.1907
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0328
Time: 10:31:30 Log-likelihood 719.87
Cov. Estimator: Clustered
F-statistic: 6.0607
Entities: 27 P-value 0.0002
Avg Obs: 6.0000 Distribution: F(4,126)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 2.3205
P-value 0.0604
Time periods: 6 Distribution: F(4,126)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
====================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------------
total_score_desi 0.0008 0.0004 2.0660 0.0409 3.207e-05 0.0015
health 0.0001 0.0009 0.1393 0.8895 -0.0017 0.0020
education -0.0032 0.0025 -1.2954 0.1975 -0.0081 0.0017
unemployment -0.0004 0.0003 -1.2752 0.2046 -0.0010 0.0002
====================================================================================
F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)
Included effects: Entity, Time
model0_robust_entity_time = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health + education + unemployment',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model0_robust_entity_time)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.1614
Estimator: PanelOLS R-squared (Between): 0.0328
No. Observations: 162 R-squared (Within): 0.1907
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0328
Time: 10:31:31 Log-likelihood 719.87
Cov. Estimator: Clustered
F-statistic: 6.0607
Entities: 27 P-value 0.0002
Avg Obs: 6.0000 Distribution: F(4,126)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 4.3415
P-value 0.0025
Time periods: 6 Distribution: F(4,126)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
====================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------------
total_score_desi 0.0008 0.0003 2.3938 0.0181 0.0001 0.0014
health 0.0001 0.0010 0.1293 0.8973 -0.0019 0.0022
education -0.0032 0.0021 -1.5107 0.1334 -0.0074 0.0010
unemployment -0.0004 0.0003 -1.6024 0.1116 -0.0009 9.625e-05
====================================================================================
F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)
Included effects: Entity, Time
print(compare({"FE-model": model0, "Robust-model(entity)": model0_robust_entity, "Robust-model(entity_time)": model0_robust_entity_time}, precision='pvalues'))
Model Comparison
=====================================================================================
FE-model Robust-model(entity) Robust-model(entity_time)
-------------------------------------------------------------------------------------
Dep. Variable hdi hdi hdi
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.1614 0.1614 0.1614
R-Squared (Within) 0.1907 0.1907 0.1907
R-Squared (Between) 0.0328 0.0328 0.0328
R-Squared (Overall) 0.0328 0.0328 0.0328
F-statistic 6.0607 6.0607 6.0607
P-value (F-stat) 0.0002 0.0002 0.0002
===================== ============ =========== ===========
total_score_desi 0.0008 0.0008 0.0008
(0.0005) (0.0409) (0.0181)
health 0.0001 0.0001 0.0001
(0.8581) (0.8895) (0.8973)
education -0.0032 -0.0032 -0.0032
(0.0346) (0.1975) (0.1334)
unemployment -0.0004 -0.0004 -0.0004
(0.2360) (0.2046) (0.1116)
======================= ============== ============= =============
Effects Entity Entity Entity
Time Time Time
-------------------------------------------------------------------------------------
P-values reported in parentheses
def calc_weightedsum1(check_df):
n = check_df.groupby('name', sort = False).size()
cleanvarbygroups = check_df.groupby('name', sort = False).x_resid.var()*(n-1)
coefconcor_multiplereg = []
for country in check_df['name'].unique():
subsample = check_df.loc[check_df['name'] == country,]
coefconcor_multiplereg.append(statf.ols(formula = 'y_resid ~ x_resid', data = subsample).fit().params.iloc[1])
weightedsum1 = sum(coefconcor_multiplereg*cleanvarbygroups/sum(cleanvarbygroups))
print('Manual calculation:', weightedsum1)
return weightedsum1
def check_groups(model, var_to_check, target, other_vars=None):
check_df = final_df.loc[:,['name', 'period']]
formula_target = f'{target} ~ health + unemployment + education + C(name, Treatment("Finland")) + C(period)'
formula_var_x = f'{var_to_check} ~ health + unemployment + education + C(name, Treatment("Finland")) + C(period)'
if other_vars is not None:
for var in other_vars:
formula_target += f' + {var}'
formula_var_x += f' + {var}'
check_df['y_resid'] = statf.ols(formula = formula_target, data = final_df).fit().resid
check_df['x_resid'] = statf.ols(formula = formula_var_x, data = final_df).fit().resid
print('\033[1m', var_to_check, '\033[0m')
weightedsum1 = calc_weightedsum1(check_df)
print('Model coeff:', model.params[var_to_check])
assert np.allclose(weightedsum1, model.params[var_to_check]), 'Coefficients mismatch'
var_to_check = 'total_score_desi'
target = 'hdi'
check_groups(model0,var_to_check, target)
total_score_desi
Manual calculation: 0.0007616990949393254
Model coeff: 0.0007616990949393253
model1 = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit()
print(model1)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.2767
Estimator: PanelOLS R-squared (Between): 0.0101
No. Observations: 162 R-squared (Within): 0.2732
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0101
Time: 10:31:31 Log-likelihood 731.85
Cov. Estimator: Unadjusted
F-statistic: 6.7219
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 6.7219
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 2.348e-05 0.0003 0.0845 0.9328 -0.0005 0.0006
desi_idt 0.0031 0.0006 5.1490 0.0000 0.0019 0.0043
desi_dps 0.0004 0.0010 0.3675 0.7139 -0.0017 0.0025
desi_hc -0.0008 0.0012 -0.6903 0.4913 -0.0032 0.0016
health 0.0004 0.0007 0.4897 0.6252 -0.0011 0.0018
unemployment -0.0008 0.0003 -2.3376 0.0210 -0.0015 -0.0001
education -0.0024 0.0014 -1.6437 0.1028 -0.0052 0.0005
================================================================================
F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model1_robust_entity = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model1_robust_entity)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.2767
Estimator: PanelOLS R-squared (Between): 0.0101
No. Observations: 162 R-squared (Within): 0.2732
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0101
Time: 10:31:32 Log-likelihood 731.85
Cov. Estimator: Clustered
F-statistic: 6.7219
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 3.0158
P-value 0.0059
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 2.348e-05 0.0003 0.0698 0.9445 -0.0006 0.0007
desi_idt 0.0031 0.0011 2.8151 0.0057 0.0009 0.0053
desi_dps 0.0004 0.0016 0.2373 0.8128 -0.0028 0.0036
desi_hc -0.0008 0.0019 -0.4473 0.6555 -0.0045 0.0028
health 0.0004 0.0012 0.2998 0.7648 -0.0020 0.0027
unemployment -0.0008 0.0003 -2.8755 0.0048 -0.0014 -0.0003
education -0.0024 0.0023 -1.0481 0.2967 -0.0068 0.0021
================================================================================
F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model1_robust_entity_time = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model1_robust_entity_time)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: hdi R-squared: 0.2767
Estimator: PanelOLS R-squared (Between): 0.0101
No. Observations: 162 R-squared (Within): 0.2732
Date: Thu, Jun 13 2024 R-squared (Overall): 0.0101
Time: 10:31:32 Log-likelihood 731.85
Cov. Estimator: Clustered
F-statistic: 6.7219
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 0.5717
P-value 0.7778
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 2.348e-05 0.0003 0.0802 0.9362 -0.0006 0.0006
desi_idt 0.0031 0.0010 3.1935 0.0018 0.0012 0.0050
desi_dps 0.0004 0.0021 0.1874 0.8517 -0.0037 0.0045
desi_hc -0.0008 0.0017 -0.4807 0.6316 -0.0043 0.0026
health 0.0004 0.0013 0.2697 0.7878 -0.0022 0.0029
unemployment -0.0008 0.0002 -3.8722 0.0002 -0.0012 -0.0004
education -0.0024 0.0018 -1.3422 0.1820 -0.0058 0.0011
================================================================================
F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
print(compare({"FE-model": model1, "Robust-model(entity)": model1_robust_entity, "Robust-model(entity_time)": model1_robust_entity_time},
precision='pvalues'))
Model Comparison
======================================================================================
FE-model Robust-model(entity) Robust-model(entity_time)
--------------------------------------------------------------------------------------
Dep. Variable hdi hdi hdi
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.2767 0.2767 0.2767
R-Squared (Within) 0.2732 0.2732 0.2732
R-Squared (Between) 0.0101 0.0101 0.0101
R-Squared (Overall) 0.0101 0.0101 0.0101
F-statistic 6.7219 6.7219 6.7219
P-value (F-stat) 0.0000 0.0000 0.0000
===================== ============= =========== ===========
desi_conn 2.348e-05 2.348e-05 2.348e-05
(0.9328) (0.9445) (0.9362)
desi_idt 0.0031 0.0031 0.0031
(1.008e-06) (0.0057) (0.0018)
desi_dps 0.0004 0.0004 0.0004
(0.7139) (0.8128) (0.8517)
desi_hc -0.0008 -0.0008 -0.0008
(0.4913) (0.6555) (0.6316)
health 0.0004 0.0004 0.0004
(0.6252) (0.7648) (0.7878)
unemployment -0.0008 -0.0008 -0.0008
(0.0210) (0.0048) (0.0002)
education -0.0024 -0.0024 -0.0024
(0.1028) (0.2967) (0.1820)
======================= =============== ============= =============
Effects Entity Entity Entity
Time Time Time
--------------------------------------------------------------------------------------
P-values reported in parentheses
var_to_check = 'desi_conn'
target = 'hdi'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)
var_to_check = 'desi_idt'
target = 'hdi'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)
var_to_check = 'desi_dps'
target = 'hdi'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)
var_to_check = 'desi_hc'
target = 'hdi'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model1, var_to_check, target, other_vars)
desi_conn Manual calculation: 2.3478822678223248e-05 Model coeff: 2.3478822678212494e-05 desi_idt Manual calculation: 0.003099960566821051 Model coeff: 0.0030999605668210555 desi_dps Manual calculation: 0.00038499008158690855 Model coeff: 0.00038499008158690573 desi_hc Manual calculation: -0.0008308973659444528 Model coeff: -0.0008308973659444756
model2 = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit()
print(model2)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: le R-squared: 0.4150
Estimator: PanelOLS R-squared (Between): 0.1541
No. Observations: 162 R-squared (Within): -5.2924
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1538
Time: 10:31:34 Log-likelihood -78.789
Cov. Estimator: Unadjusted
F-statistic: 12.463
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 12.463
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0065 0.0414 0.1560 0.8763 -0.0755 0.0884
desi_idt 0.5896 0.0897 6.5724 0.0000 0.4120 0.7671
desi_dps 0.3281 0.1561 2.1017 0.0376 0.0191 0.6371
desi_hc -0.0967 0.1794 -0.5393 0.5907 -0.4517 0.2583
health 0.0820 0.1075 0.7629 0.4470 -0.1308 0.2948
unemployment -0.0786 0.0519 -1.5136 0.1327 -0.1814 0.0242
education -0.3281 0.2140 -1.5332 0.1278 -0.7518 0.0955
================================================================================
F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model2_robust_entity = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model2_robust_entity)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: le R-squared: 0.4150
Estimator: PanelOLS R-squared (Between): 0.1541
No. Observations: 162 R-squared (Within): -5.2924
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1538
Time: 10:31:34 Log-likelihood -78.789
Cov. Estimator: Clustered
F-statistic: 12.463
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 5.3704
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0065 0.0487 0.1325 0.8948 -0.0900 0.1029
desi_idt 0.5896 0.1286 4.5842 0.0000 0.3350 0.8442
desi_dps 0.3281 0.2093 1.5677 0.1195 -0.0862 0.7423
desi_hc -0.0967 0.2867 -0.3373 0.7364 -0.6643 0.4708
health 0.0820 0.1564 0.5243 0.6010 -0.2276 0.3917
unemployment -0.0786 0.0354 -2.2205 0.0282 -0.1487 -0.0085
education -0.3281 0.2597 -1.2636 0.2088 -0.8421 0.1859
================================================================================
F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model2_robust_entity_time = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model2_robust_entity_time)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: le R-squared: 0.4150
Estimator: PanelOLS R-squared (Between): 0.1541
No. Observations: 162 R-squared (Within): -5.2924
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1538
Time: 10:31:34 Log-likelihood -78.789
Cov. Estimator: Clustered
F-statistic: 12.463
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 14.565
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0065 0.0542 0.1191 0.9054 -0.1008 0.1138
desi_idt 0.5896 0.1004 5.8713 0.0000 0.3908 0.7883
desi_dps 0.3281 0.2918 1.1242 0.2631 -0.2496 0.9058
desi_hc -0.0967 0.2637 -0.3668 0.7144 -0.6187 0.4253
health 0.0820 0.1860 0.4410 0.6600 -0.2862 0.4502
unemployment -0.0786 0.0216 -3.6324 0.0004 -0.1215 -0.0358
education -0.3281 0.2080 -1.5773 0.1173 -0.7399 0.0837
================================================================================
F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
print(compare({"FE-model": model2, "Robust-model(entity)": model2_robust_entity,"Robust-model(entity-time)": model2_robust_entity_time}, precision='pvalues'))
Model Comparison
======================================================================================
FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable le le le
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.4150 0.4150 0.4150
R-Squared (Within) -5.2924 -5.2924 -5.2924
R-Squared (Between) 0.1541 0.1541 0.1541
R-Squared (Overall) 0.1538 0.1538 0.1538
F-statistic 12.463 12.463 12.463
P-value (F-stat) 0.0000 0.0000 0.0000
===================== ============= ============= =============
desi_conn 0.0065 0.0065 0.0065
(0.8763) (0.8948) (0.9054)
desi_idt 0.5896 0.5896 0.5896
(1.259e-09) (1.103e-05) (3.763e-08)
desi_dps 0.3281 0.3281 0.3281
(0.0376) (0.1195) (0.2631)
desi_hc -0.0967 -0.0967 -0.0967
(0.5907) (0.7364) (0.7144)
health 0.0820 0.0820 0.0820
(0.4470) (0.6010) (0.6600)
unemployment -0.0786 -0.0786 -0.0786
(0.1327) (0.0282) (0.0004)
education -0.3281 -0.3281 -0.3281
(0.1278) (0.2088) (0.1173)
======================= =============== =============== ===============
Effects Entity Entity Entity
Time Time Time
--------------------------------------------------------------------------------------
P-values reported in parentheses
var_to_check = 'desi_conn'
target = 'le'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)
var_to_check = 'desi_idt'
target = 'le'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)
var_to_check = 'desi_dps'
target = 'le'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)
var_to_check = 'desi_hc'
target = 'le'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model2, var_to_check, target, other_vars)
desi_conn Manual calculation: 0.006456409482128464 Model coeff: 0.0064564094821282485 desi_idt Manual calculation: 0.5895773771768185 Model coeff: 0.5895773771768196 desi_dps Manual calculation: 0.32808049800709094 Model coeff: 0.32808049800709244 desi_hc Manual calculation: -0.09672010276064617 Model coeff: -0.09672010276064694
model3 = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit()
print(model3)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: eys R-squared: 0.2304
Estimator: PanelOLS R-squared (Between): -0.0186
No. Observations: 162 R-squared (Within): 0.2635
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0185
Time: 10:31:36 Log-likelihood 72.800
Cov. Estimator: Unadjusted
F-statistic: 5.2602
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 5.2602
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0281 0.0162 1.7281 0.0865 -0.0041 0.0602
desi_idt 0.0391 0.0352 1.1098 0.2692 -0.0306 0.1087
desi_dps -0.1117 0.0612 -1.8236 0.0706 -0.2329 0.0095
desi_hc 0.1031 0.0704 1.4657 0.1453 -0.0361 0.2424
health 0.1213 0.0422 2.8762 0.0047 0.0378 0.2048
unemployment -0.0698 0.0204 -3.4263 0.0008 -0.1102 -0.0295
education -0.1234 0.0840 -1.4699 0.1441 -0.2896 0.0428
================================================================================
F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model3_robust_entity = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model3_robust_entity)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: eys R-squared: 0.2304
Estimator: PanelOLS R-squared (Between): -0.0186
No. Observations: 162 R-squared (Within): 0.2635
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0185
Time: 10:31:36 Log-likelihood 72.800
Cov. Estimator: Clustered
F-statistic: 5.2602
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 3.6836
P-value 0.0012
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0281 0.0209 1.3401 0.1827 -0.0134 0.0695
desi_idt 0.0391 0.0651 0.5997 0.5498 -0.0899 0.1680
desi_dps -0.1117 0.0931 -1.1997 0.2326 -0.2959 0.0726
desi_hc 0.1031 0.0902 1.1427 0.2554 -0.0755 0.2818
health 0.1213 0.0494 2.4565 0.0154 0.0236 0.2191
unemployment -0.0698 0.0307 -2.2706 0.0249 -0.1307 -0.0090
education -0.1234 0.1033 -1.1948 0.2345 -0.3279 0.0810
================================================================================
F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model3_robust_entity_time = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model3_robust_entity_time)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: eys R-squared: 0.2304
Estimator: PanelOLS R-squared (Between): -0.0186
No. Observations: 162 R-squared (Within): 0.2635
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0185
Time: 10:31:36 Log-likelihood 72.800
Cov. Estimator: Clustered
F-statistic: 5.2602
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 3.8213
P-value 0.0009
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0281 0.0167 1.6773 0.0960 -0.0051 0.0612
desi_idt 0.0391 0.0544 0.7178 0.4742 -0.0686 0.1468
desi_dps -0.1117 0.0820 -1.3615 0.1759 -0.2740 0.0507
desi_hc 0.1031 0.1019 1.0118 0.3136 -0.0986 0.3049
health 0.1213 0.0492 2.4651 0.0151 0.0239 0.2187
unemployment -0.0698 0.0223 -3.1309 0.0022 -0.1140 -0.0257
education -0.1234 0.0874 -1.4117 0.1606 -0.2965 0.0496
================================================================================
F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
print(compare({"FE-model": model3, "Robust-model(entity)": model3_robust_entity, "Robust-model(entity-time)": model3_robust_entity_time}, precision='pvalues'))
Model Comparison
=====================================================================================
FE-model Robust-model(entity) Robust-model(entity-time)
-------------------------------------------------------------------------------------
Dep. Variable eys eys eys
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.2304 0.2304 0.2304
R-Squared (Within) 0.2635 0.2635 0.2635
R-Squared (Between) -0.0186 -0.0186 -0.0186
R-Squared (Overall) -0.0185 -0.0185 -0.0185
F-statistic 5.2602 5.2602 5.2602
P-value (F-stat) 0.0000 0.0000 0.0000
===================== ============ =========== ===========
desi_conn 0.0281 0.0281 0.0281
(0.0865) (0.1827) (0.0960)
desi_idt 0.0391 0.0391 0.0391
(0.2692) (0.5498) (0.4742)
desi_dps -0.1117 -0.1117 -0.1117
(0.0706) (0.2326) (0.1759)
desi_hc 0.1031 0.1031 0.1031
(0.1453) (0.2554) (0.3136)
health 0.1213 0.1213 0.1213
(0.0047) (0.0154) (0.0151)
unemployment -0.0698 -0.0698 -0.0698
(0.0008) (0.0249) (0.0022)
education -0.1234 -0.1234 -0.1234
(0.1441) (0.2345) (0.1606)
======================= ============== ============= =============
Effects Entity Entity Entity
Time Time Time
-------------------------------------------------------------------------------------
P-values reported in parentheses
var_to_check = 'desi_conn'
target = 'eys'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)
var_to_check = 'desi_idt'
target = 'eys'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)
var_to_check = 'desi_dps'
target = 'eys'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)
var_to_check = 'desi_hc'
target = 'eys'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model3, var_to_check, target, other_vars)
desi_conn Manual calculation: 0.028057255846445122 Model coeff: 0.028057255846445035 desi_idt Manual calculation: 0.0390558233372044 Model coeff: 0.039055823337204996 desi_dps Manual calculation: -0.11167477699862385 Model coeff: -0.11167477699862337 desi_hc Manual calculation: 0.10312430686054608 Model coeff: 0.10312430686054501
model4 = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit()
print(model4)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: mys R-squared: 0.3070
Estimator: PanelOLS R-squared (Between): -0.0209
No. Observations: 162 R-squared (Within): -0.2977
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0209
Time: 10:31:37 Log-likelihood 221.12
Cov. Estimator: Unadjusted
F-statistic: 7.7853
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 7.7853
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0032 0.0065 0.4910 0.6243 -0.0097 0.0161
desi_idt 0.0153 0.0141 1.0842 0.2804 -0.0126 0.0432
desi_dps -0.0511 0.0245 -2.0845 0.0392 -0.0996 -0.0026
desi_hc 0.0280 0.0282 0.9934 0.3224 -0.0278 0.0837
health -0.0104 0.0169 -0.6168 0.5385 -0.0438 0.0230
unemployment -0.0451 0.0082 -5.5294 0.0000 -0.0612 -0.0290
education 0.1039 0.0336 3.0906 0.0025 0.0373 0.1704
================================================================================
F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model4_robust_entity = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model4_robust_entity )
PanelOLS Estimation Summary
================================================================================
Dep. Variable: mys R-squared: 0.3070
Estimator: PanelOLS R-squared (Between): -0.0209
No. Observations: 162 R-squared (Within): -0.2977
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0209
Time: 10:31:37 Log-likelihood 221.12
Cov. Estimator: Clustered
F-statistic: 7.7853
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 2.4435
P-value 0.0222
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0032 0.0070 0.4557 0.6494 -0.0107 0.0171
desi_idt 0.0153 0.0224 0.6827 0.4961 -0.0290 0.0596
desi_dps -0.0511 0.0363 -1.4065 0.1621 -0.1230 0.0208
desi_hc 0.0280 0.0345 0.8102 0.4194 -0.0404 0.0963
health -0.0104 0.0179 -0.5829 0.5610 -0.0458 0.0250
unemployment -0.0451 0.0162 -2.7841 0.0062 -0.0772 -0.0130
education 0.1039 0.0608 1.7093 0.0899 -0.0164 0.2242
================================================================================
F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model4_robust_entity_time = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model4_robust_entity_time )
PanelOLS Estimation Summary
================================================================================
Dep. Variable: mys R-squared: 0.3070
Estimator: PanelOLS R-squared (Between): -0.0209
No. Observations: 162 R-squared (Within): -0.2977
Date: Thu, Jun 13 2024 R-squared (Overall): -0.0209
Time: 10:31:38 Log-likelihood 221.12
Cov. Estimator: Clustered
F-statistic: 7.7853
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 4.0875
P-value 0.0005
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 0.0032 0.0060 0.5321 0.5956 -0.0087 0.0151
desi_idt 0.0153 0.0188 0.8143 0.4170 -0.0219 0.0524
desi_dps -0.0511 0.0322 -1.5878 0.1149 -0.1148 0.0126
desi_hc 0.0280 0.0290 0.9649 0.3365 -0.0294 0.0854
health -0.0104 0.0177 -0.5897 0.5565 -0.0454 0.0245
unemployment -0.0451 0.0137 -3.2802 0.0013 -0.0723 -0.0179
education 0.1039 0.0495 2.0984 0.0379 0.0059 0.2019
================================================================================
F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
print(compare({"FE-model": model4, "Robust-model(entity)": model4_robust_entity,"Robust-model(entity-time)": model4_robust_entity_time}, precision='pvalues'))
Model Comparison
======================================================================================
FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable mys mys mys
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.3070 0.3070 0.3070
R-Squared (Within) -0.2977 -0.2977 -0.2977
R-Squared (Between) -0.0209 -0.0209 -0.0209
R-Squared (Overall) -0.0209 -0.0209 -0.0209
F-statistic 7.7853 7.7853 7.7853
P-value (F-stat) 0.0000 0.0000 0.0000
===================== ============= =========== ===========
desi_conn 0.0032 0.0032 0.0032
(0.6243) (0.6494) (0.5956)
desi_idt 0.0153 0.0153 0.0153
(0.2804) (0.4961) (0.4170)
desi_dps -0.0511 -0.0511 -0.0511
(0.0392) (0.1621) (0.1149)
desi_hc 0.0280 0.0280 0.0280
(0.3224) (0.4194) (0.3365)
health -0.0104 -0.0104 -0.0104
(0.5385) (0.5610) (0.5565)
unemployment -0.0451 -0.0451 -0.0451
(1.837e-07) (0.0062) (0.0013)
education 0.1039 0.1039 0.1039
(0.0025) (0.0899) (0.0379)
======================= =============== ============= =============
Effects Entity Entity Entity
Time Time Time
--------------------------------------------------------------------------------------
P-values reported in parentheses
var_to_check = 'desi_conn'
target = 'mys'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)
var_to_check = 'desi_idt'
target = 'mys'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)
var_to_check = 'desi_dps'
target = 'mys'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)
var_to_check = 'desi_hc'
target = 'mys'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model4, var_to_check, target, other_vars)
desi_conn Manual calculation: 0.0031911750019771906 Model coeff: 0.0031911750019773316 desi_idt Manual calculation: 0.015272599852596696 Model coeff: 0.015272599852596418 desi_dps Manual calculation: -0.05109764047758645 Model coeff: -0.05109764047758676 desi_hc Manual calculation: 0.02798003855050323 Model coeff: 0.0279800385505033
model5 = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit()
print(model5)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: gnipc R-squared: 0.2930
Estimator: PanelOLS R-squared (Between): 0.1479
No. Observations: 162 R-squared (Within): 0.1735
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1480
Time: 10:31:40 Log-likelihood -1412.0
Cov. Estimator: Unadjusted
F-statistic: 7.2835
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 7.2835
P-value 0.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 262.41 155.20 1.6907 0.0934 -44.812 569.62
desi_idt -1375.0 336.39 -4.0874 0.0001 -2040.9 -709.11
desi_dps 2497.9 585.38 4.2672 0.0000 1339.2 3656.7
desi_hc -363.02 672.57 -0.5398 0.5903 -1694.3 968.29
health -1149.4 403.16 -2.8510 0.0051 -1947.5 -351.37
unemployment 107.87 194.79 0.5538 0.5807 -277.70 493.44
education -2770.6 802.56 -3.4522 0.0008 -4359.2 -1182.0
================================================================================
F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model5_robust_entity = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model5_robust_entity)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: gnipc R-squared: 0.2930
Estimator: PanelOLS R-squared (Between): 0.1479
No. Observations: 162 R-squared (Within): 0.1735
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1480
Time: 10:31:40 Log-likelihood -1412.0
Cov. Estimator: Clustered
F-statistic: 7.2835
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): 1.1888
P-value 0.3141
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 262.41 311.50 0.8424 0.4012 -354.20 879.01
desi_idt -1375.0 929.79 -1.4788 0.1417 -3215.4 465.48
desi_dps 2497.9 1802.3 1.3860 0.1683 -1069.5 6065.4
desi_hc -363.02 563.66 -0.6440 0.5207 -1478.7 752.70
health -1149.4 609.81 -1.8849 0.0618 -2356.5 57.665
unemployment 107.87 204.50 0.5275 0.5988 -296.92 512.66
education -2770.6 1648.0 -1.6812 0.0953 -6032.8 491.59
================================================================================
F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
model5_robust_entity_time = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model5_robust_entity_time)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: gnipc R-squared: 0.2930
Estimator: PanelOLS R-squared (Between): 0.1479
No. Observations: 162 R-squared (Within): 0.1735
Date: Thu, Jun 13 2024 R-squared (Overall): 0.1480
Time: 10:31:40 Log-likelihood -1412.0
Cov. Estimator: Clustered
F-statistic: 7.2835
Entities: 27 P-value 0.0000
Avg Obs: 6.0000 Distribution: F(7,123)
Min Obs: 6.0000
Max Obs: 6.0000 F-statistic (robust): -2.4624
P-value 1.0000
Time periods: 6 Distribution: F(7,123)
Avg Obs: 27.000
Min Obs: 27.000
Max Obs: 27.000
Parameter Estimates
================================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
--------------------------------------------------------------------------------
desi_conn 262.41 283.69 0.9250 0.3568 -299.14 823.95
desi_idt -1375.0 787.76 -1.7454 0.0834 -2934.3 184.34
desi_dps 2497.9 1352.6 1.8468 0.0672 -179.42 5175.3
desi_hc -363.02 414.05 -0.8768 0.3823 -1182.6 456.56
health -1149.4 530.17 -2.1680 0.0321 -2198.8 -99.976
unemployment 107.87 140.81 0.7661 0.4451 -170.85 386.59
education -2770.6 1493.9 -1.8546 0.0661 -5727.8 186.55
================================================================================
F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)
Included effects: Entity, Time
print(compare({"FE-model": model5, "Robust-model(entity)": model5_robust_entity, "Robust-model(entity-time)": model5_robust_entity_time}, precision='pvalues'))
Model Comparison
======================================================================================
FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable gnipc gnipc gnipc
Estimator PanelOLS PanelOLS PanelOLS
No. Observations 162 162 162
Cov. Est. Unadjusted Clustered Clustered
R-squared 0.2930 0.2930 0.2930
R-Squared (Within) 0.1735 0.1735 0.1735
R-Squared (Between) 0.1479 0.1479 0.1479
R-Squared (Overall) 0.1480 0.1480 0.1480
F-statistic 7.2835 7.2835 7.2835
P-value (F-stat) 0.0000 0.0000 0.0000
===================== ============= =========== ===========
desi_conn 262.41 262.41 262.41
(0.0934) (0.4012) (0.3568)
desi_idt -1375.0 -1375.0 -1375.0
(7.815e-05) (0.1417) (0.0834)
desi_dps 2497.9 2497.9 2497.9
(3.913e-05) (0.1683) (0.0672)
desi_hc -363.02 -363.02 -363.02
(0.5903) (0.5207) (0.3823)
health -1149.4 -1149.4 -1149.4
(0.0051) (0.0618) (0.0321)
unemployment 107.87 107.87 107.87
(0.5807) (0.5988) (0.4451)
education -2770.6 -2770.6 -2770.6
(0.0008) (0.0953) (0.0661)
======================= =============== ============= =============
Effects Entity Entity Entity
Time Time Time
--------------------------------------------------------------------------------------
P-values reported in parentheses
var_to_check = 'desi_conn'
target = 'gnipc'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)
var_to_check = 'desi_idt'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)
var_to_check = 'desi_dps'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)
var_to_check = 'desi_hc'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model5, var_to_check, target, other_vars)
desi_conn Manual calculation: 262.4061188607826 Model coeff: 262.4061188607813 desi_idt Manual calculation: -1374.980072271474 Model coeff: -1374.9800722714735 desi_dps Manual calculation: 2497.9494924253463 Model coeff: 2497.9494924253477 desi_hc Manual calculation: -363.022081003925 Model coeff: -363.0220810039283
Оценка модели HDI и I-desi¶
y, X = dmatrices('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + unemployment + education + health',
data=final_df, return_type='dataframe')
vif = pd.DataFrame()
vif['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif['variable'] = X.columns
vif
| VIF | variable | |
|---|---|---|
| 0 | 72.768942 | Intercept |
| 1 | 1.891819 | desi_conn |
| 2 | 4.180753 | desi_idt |
| 3 | 3.829759 | desi_dps |
| 4 | 3.621648 | desi_hc |
| 5 | 1.085186 | unemployment |
| 6 | 1.448257 | education |
| 7 | 1.513656 | health |